package cl.crawler;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlParagraph;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.junit.Test;

import java.io.IOException;

/**
 * Created by 抽离 on 2017/7/5 0005.
 */
public class PanCrawler {
    
    private String url = "http://pan.baidu.com/wap/share/home?third=0&uk=168713112";

    @Test
    public void testCrawlerPan(){
        try {
            Document document = Jsoup
                    .connect(url)
                    .get();
            System.out.println("：" + document);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    @Test
    public void testCrawlerWithHtmlUnit(){
        WebClient webClient = new WebClient(BrowserVersion.FIREFOX_52);
//        webClient.getOptions().setJavaScriptEnabled(true);
        try {
            HtmlPage htmlPage = webClient.getPage(url);
            webClient.waitForBackgroundJavaScript(1000);
//           for(int i = 0; i < 3; i++){
//               try {
//                   Thread.sleep(5000);
//               } catch (InterruptedException e) {
//                   e.printStackTrace();
//               }
//           }
            System.out.println("HtmlPage：" + htmlPage.asXml());
       } catch (IOException e) {
            e.printStackTrace();
        }
    }

}
